REVIEWERS DO NOT RUN THIS CHUNK! THIS IS FOR DATA PRODUCT LEADS ONLY!
REVIEWERS PLEASE START HERE AND THANK YOU FOR YOUR EYES ON ALL THE DATA! :-)
# THIS LINK SHOULD BE UPDATED WITH THE MOST RECENT PASTA LINK FROM THE EDI STAGING ENVIRONMENT!
current_df <- read_csv('https://raw.githubusercontent.com/melofton/Reservoirs/refs/heads/master/Data/DataAlreadyUploadedToEDI/EDIProductionFiles/MakeEMLFluoroProbe/2024/FluoroProbe_2014_2024.csv') %>%
mutate(DateTime = force_tz(DateTime, tzone = "America/New_York"))
## Rows: 118502 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Reservoir
## dbl (31): Site, CastID, Depth_m, GreenAlgae_ugL, Bluegreens_ugL, BrownAlgae...
## dttm (1): DateTime
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# double-check time zone
head(current_df$DateTime)
## [1] "2014-05-05 13:08:52 EDT" "2014-05-05 13:08:53 EDT"
## [3] "2014-05-05 13:08:54 EDT" "2014-05-05 13:08:55 EDT"
## [5] "2014-05-05 13:08:57 EDT" "2014-05-05 13:08:58 EDT"
hist(hour(current_df$DateTime))
this_year <- current_df %>%
filter(year(DateTime) == 2024)
This section checks to make sure each observation has a data flag. It also checks to make sure the frequency of flags match what we expect to see.
#make sure no NAS in the Flag columns
Flags=current_df%>%
select(DateTime, starts_with("Flag"))
RowsNA=Flags[!complete.cases(Flags), ] # Keep only the complete rows
#check the flag column
Flags=current_df%>%
select(starts_with("Flag"))
# Make a table with the number of times a flag was used
for(f in 1:(ncol(Flags))){
#print(colnames(Flags[f]))
print(table(Flags[,f], useNA = "always"))
}
## Flag_GreenAlgae_ugL
## 0 1 <NA>
## 99806 18696 0
## Flag_Bluegreens_ugL
## 0 1 <NA>
## 99806 18696 0
## Flag_BrownAlgae_ugL
## 0 1 <NA>
## 99806 18696 0
## Flag_MixedAlgae_ugL
## 0 1 <NA>
## 99806 18696 0
## Flag_YellowSubstances_ugL
## 0 1 <NA>
## 99806 18696 0
## Flag_TotalConc_ugL
## 0 1 <NA>
## 99806 18696 0
## Flag_Temp_C
## 0 2 <NA>
## 75538 42964 0
## Flag_Transmission_perc
## 0 4 <NA>
## 8272 110230 0
## Flag_RFU_525nm
## 0 5 <NA>
## 118065 437 0
## Flag_RFU_570nm
## 0 5 <NA>
## 117695 807 0
## Flag_RFU_610nm
## 0 5 <NA>
## 118411 91 0
## Flag_RFU_370nm
## 0 1 5 <NA>
## 99595 18696 211 0
## Flag_RFU_590nm
## 0 5 <NA>
## 118479 23 0
## Flag_RFU_470nm
## 0 1 5 <NA>
## 99438 18696 368 0
flora_heatmap <- function(fp_data, reservoir, year, site, z){
#subset to relevant data
fp <- fp_data %>%
filter(Reservoir == reservoir & year(DateTime) == year & Site == site) %>%
select(CastID, DateTime, Depth_m, {{z}})
#slice by depth for each reservoir
if (reservoir == "FCR"){
if(site == 50){
depths = seq(0.1, 9.3, by = 0.3)
} else if(site == 40){
depths = seq(0.1, 8.5, by = 0.3)
} else if(site == 30){
depths = seq(0.1, 7, by = 0.3)
} else if(site == 20){
depths = seq(0.1, 4.5, by = 0.3)
} else if(site == 10){
depths = seq(0.1, 3.5, by = 0.3)
}
df.final<-data.frame()
for (i in 1:length(depths)){
fp_layer <- fp %>%
group_by(CastID) %>%
slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
# Bind each of the data layers together.
df.final = bind_rows(df.final, fp_layer)
}
} else if (reservoir == "BVR"){
depths = seq(0.1, 10, by = 0.3)
df.final<-data.frame()
for (i in 1:length(depths)){
fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
# Bind each of the data layers together.
df.final = bind_rows(df.final, fp_layer)
}
} else if(reservoir == "CCR"){
depths = seq(0.1, 20, by = 0.3)
df.final<-data.frame()
for (i in 1:length(depths)){
fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
# Bind each of the data layers together.
df.final = bind_rows(df.final, fp_layer)
}
} else if(reservoir == "GWR"){
depths = seq(0.1, 12, by = 0.3)
df.final<-data.frame()
for (i in 1:length(depths)){
fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
# Bind each of the data layers together.
df.final = bind_rows(df.final, fp_layer)
}
} else if(reservoir == "SHR"){
depths = seq(0.1, 30, by = 0.3)
df.final<-data.frame()
for (i in 1:length(depths)){
fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
# Bind each of the data layers together.
df.final = bind_rows(df.final, fp_layer)
}
}
#wrangle final dataframe for plotting
# Re-arrange the data frame by date
fp_new <- arrange(df.final, DateTime)
# Round each extracted depth to the nearest 10th.
fp_new$Depth_m <- round(as.numeric(fp_new$Depth_m), digits = 0.5)
# Convert to DOY
fp_new$DOY <- yday(fp_new$DateTime)
fig_title <- paste(reservoir, year, "Site", site, z, sep = " ")
interp <- interp(x=fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]),
xo = seq(min(fp_new$DOY), max(fp_new$DOY), by = .1),
yo = seq(min(fp_new$Depth_m), max(fp_new$Depth_m), by = 0.01),
extrap = T, linear = T, duplicate = "strip")
interp <- interp2xyz(interp, data.frame=T)
p1 <- ggplot(interp, aes(x=x, y=y))+
geom_raster(aes(fill=z))+
scale_y_reverse(expand = c(0,0))+
scale_x_continuous(expand = c(0, 0)) +
scale_fill_gradientn(colours = blue2green2red(60), na.value="gray")+
labs(x = "Day of year", y = "Depth (m)", title = fig_title,fill=expression(paste(mu,g/L)))+
theme_bw()
print(p1)
}
flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2024, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter
flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2024, site = 40, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter
flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2024, site = 30, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter
flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2024, site = 20, z = "TotalConc_ugL")
flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2024, site = 10, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter
flora_heatmap(fp_data = current_df, reservoir = "BVR", year = 2024, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter
flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter
flora_heatmap(fp_data = current_df, reservoir = "BVR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter
flora_heatmap(fp_data = current_df, reservoir = "CCR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter
flora_heatmap(fp_data = current_df, reservoir = "SHR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter
flora_heatmap(fp_data = current_df, reservoir = "GWR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter
current_df |>
select(c('DateTime','Depth_m', 'Reservoir', 'Site', starts_with('RFU'))) |>
pivot_longer(RFU_370nm:RFU_610nm, values_to = 'RFU', names_to = 'wavelength') |>
filter(RFU < 0) |>
ggplot(aes(x=DateTime, y= Depth_m, colour = as_factor(wavelength))) +
facet_wrap(Reservoir~Site) +
geom_point()
current_df |>
select(c('DateTime','Depth_m', 'Reservoir', 'Site', starts_with('RFU'))) |>
pivot_longer(RFU_370nm:RFU_610nm, values_to = 'RFU', names_to = 'wavelength') |>
filter(RFU < 0) |>
reframe(.by = wavelength,
n = n())
## # A tibble: 6 × 2
## wavelength n
## <chr> <int>
## 1 RFU_570nm 807
## 2 RFU_610nm 91
## 3 RFU_525nm 437
## 4 RFU_590nm 23
## 5 RFU_370nm 218
## 6 RFU_470nm 375
We want to make sure that our maintenance log actually worked and took out the values or changes those it was supposed to
## Rows: 28 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Reservoir, DataStream, TIMESTAMP_start, TIMESTAMP_end, start_parame...
## dbl (2): Site, flag
## lgl (2): Depth, update_value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 × 11
## Reservoir Site Depth DataStream TIMESTAMP_start TIMESTAMP_end start_parameter
## <chr> <dbl> <lgl> <chr> <chr> <chr> <chr>
## 1 FCR 20 NA Flora 2014-05-01 00:… 2023-12-31 2… Transmission_p…
## 2 FCR 10 NA Flora 2014-05-01 00:… 2023-12-31 2… Transmission_p…
## 3 BVR 50 NA Flora 2014-05-01 00:… 2023-12-31 1… Transmission_p…
## 4 CCR 50 NA Flora 2014-05-01 00:… 2023-12-31 2… Transmission_p…
## 5 SHR 50 NA Flora 2014-05-01 00:… 2023-12-31 2… Transmission_p…
## 6 GWR 50 NA Flora 2014-05-01 00:… 2023-12-31 2… Transmission_p…
## # ℹ 4 more variables: end_parameter <chr>, flag <dbl>, update_value <lgl>,
## # notes <chr>
| Reservoir | Site | Depth | DataStream | TIMESTAMP_start | TIMESTAMP_end | start_parameter | end_parameter | flag | update_value | notes |
|---|---|---|---|---|---|---|---|---|---|---|
| FCR | 20 | NA | Flora | 2014-05-01 00:00:00 EDT | 2023-12-31 23:59:59 EDT | Transmission_perc | Transmission_perc | 4 | NA | bad transmission sensor calibration |
| FCR | 10 | NA | Flora | 2014-05-01 00:00:00 EDT | 2023-12-31 23:59:59 EDT | Transmission_perc | Transmission_perc | 4 | NA | bad transmission sensor calibration |
| BVR | 50 | NA | Flora | 2014-05-01 00:00:00 EDT | 2023-12-31 11:59:59 EDT | Transmission_perc | Transmission_perc | 4 | NA | bad transmission sensor calibration |
| CCR | 50 | NA | Flora | 2014-05-01 00:00:00 EDT | 2023-12-31 23:59:59 EDT | Transmission_perc | Transmission_perc | 4 | NA | bad transmission sensor calibration |
| SHR | 50 | NA | Flora | 2014-05-01 00:00:00 EDT | 2023-12-31 23:59:59 EDT | Transmission_perc | Transmission_perc | 4 | NA | bad transmission sensor calibration |
| GWR | 50 | NA | Flora | 2014-05-01 00:00:00 EDT | 2023-12-31 23:59:59 EDT | Transmission_perc | Transmission_perc | 4 | NA | bad transmission sensor calibration |
Look at the first few rows of the data frame and check that the observations after the TIMESTAMP_start are flagged
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(colname_start)
##
## # Now:
## data %>% select(all_of(colname_start))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(colname_end)
##
## # Now:
## data %>% select(all_of(colname_end))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
| Reservoir | DateTime | Transmission_perc | Flag_Transmission_perc |
|---|---|---|---|
| FCR | 2014-05-05 13:08:52 | 78.00 | 4 |
| FCR | 2014-05-05 13:08:53 | 95.32 | 4 |
| FCR | 2014-05-05 13:08:54 | 97.31 | 4 |
| FCR | 2014-05-05 13:08:55 | 94.15 | 4 |
| FCR | 2014-05-05 13:08:57 | 95.98 | 4 |
| FCR | 2014-05-05 13:08:58 | 95.74 | 4 |
Make sure the observations are flagged
| Reservoir | DateTime | Transmission_perc | Flag_Transmission_perc |
|---|---|---|---|
| FCR | 2023-11-14 14:07:50 | 100 | 4 |
| FCR | 2023-11-14 14:07:52 | 100 | 4 |
| FCR | 2023-11-14 14:07:55 | 100 | 4 |
| FCR | 2023-11-14 14:07:57 | 100 | 4 |
| FCR | 2023-11-14 14:08:00 | 100 | 4 |
| FCR | 2023-11-14 14:08:02 | 100 | 4 |
# These lines of code make the csv of the site descriptions with lat and long
# MEL You don't need to run this if you already have the file I believe?
# # Use Gsheet because you don't need to authenticate it.
# sites <- gsheet::gsheet2tbl("https://docs.google.com/spreadsheets/d/1TlQRdjmi_lzwFfQ6Ovv1CAozmCEkHumDmbg_L4A2e-8/edit#gid=1244423834")
# #data<- read_csv("YOUR DATA.csv")# Use this if you read in a csv
# data <- current_df #This is the line you need to modify!
# trim_sites = function(data,sites){
# data_res_site=data%>% #Create a Reservoir/Site combo column
# mutate(res_site = trimws(paste0(Reservoir,Site)))
# sites_merged = sites%>% #Filter to Sites that are in the dataframe
# mutate(res_site = trimws(paste0(Reservoir,Site)))%>%
# filter(res_site%in%data_res_site$res_site)%>%
# select(-res_site)
# }
# sites_trimmed = trim_sites(data,sites)
# write.csv(sites_trimmed,"site_descriptions.csv", row.names=F)# Write to file
download.file("https://raw.githubusercontent.com/melofton/Reservoirs/master/Scripts/L1_functions/fluoroprobe_create.R", "FluoroProbe_qaqc_2014_2024.R")